VISUAL

Author

Pedro Pena

Dataset

Vamos a utilizar el dataset de carros de vega_dataset https://vega.github.io/vega-datasets/

Code
import altair as alt
from vega_datasets import data
import pandas as pd

Carga del dataset

Code
donations = pd.read_csv("https://raw.githubusercontent.com/erickedu85/dataset/master/donations.csv",parse_dates=['date'])
donations.head()
date sum refund_sum donations refunds avg max ytdsum ytdloss week_day
0 2020-01-02 62419.79 0.0 3342 0 18.677376 1040.00 879029.00 0.0 Thu
1 2020-01-03 37983.67 0.0 1949 0 19.488799 1000.00 917012.67 0.0 Fri
2 2020-01-04 26219.10 0.0 1337 0 19.610396 208.00 943231.77 0.0 Sat
3 2020-01-05 33856.07 0.0 1289 0 26.265376 5596.86 977087.84 0.0 Sun
4 2020-01-06 26447.11 0.0 1347 0 19.634083 699.67 1003534.95 0.0 Mon
Code
# print(alt.themes.names())
alt.themes.enable('dark')
C:\Users\pedro\AppData\Local\Temp\ipykernel_8384\1193503845.py:2: AltairDeprecationWarning:


Deprecated since `altair=5.5.0`. Use altair.theme instead.
Most cases require only the following change:

    # Deprecated
    alt.themes.enable('quartz')

    # Updated
    alt.theme.enable('quartz')

If your code registers a theme, make the following change:

    # Deprecated
    def custom_theme():
        return {'height': 400, 'width': 700}
    alt.themes.register('theme_name', custom_theme)
    alt.themes.enable('theme_name')

    # Updated
    @alt.theme.register('theme_name', enable=True)
    def custom_theme():
        return alt.theme.ThemeConfig(
            {'height': 400, 'width': 700}
        )

See the updated User Guide for further details:
    https://altair-viz.github.io/user_guide/api.html#theme
    https://altair-viz.github.io/user_guide/customization.html#chart-themes
ThemeRegistry.enable('dark')

Gráfico lineal

Code
alt.Chart(donations).mark_line().encode(
    alt.X('date'),
    alt.Y('sum')
).interactive()

Gráfico de barras

Code
list_week_day = ['Mon','Tue','Wed', 'Thu','Fri','Sat','Sun']

top_day = donations.groupby('week_day')['sum'].sum().idxmax()


chart = alt.Chart(donations).mark_bar().encode(
    alt.X('sum(sum)'),
    alt.Y('week_day',sort=list_week_day),
    color = alt.condition(
        alt.datum.week_day == top_day,
        alt.value('coral'),
        alt.value('steelblue')
    )
)

texto = chart.mark_text(align='left',dx=5).encode(
    text = alt.Text('sum(sum)', format='$.3s')
    # text = alt.condition(
    #     alt.datum.week_day == top_day,
    #     alt.Text('sum(sum):Q', format='$.3s'),
    #     alt.Text('sum(sum):Q', format='$.3s')
    # )
)

chart + texto

Técnicas de interacción

Tooltip

Code
cars = data.cars()
cars['URL'] = "https://google.com/search?q="+cars['Name']
cars.head()

#paso1
brush = alt.selection_interval(resolve='intersect')
#paso3
condition = alt.condition(
    brush,
    'Origin',
    alt.value('lightgray')
)

opacity_condition = alt.condition(
    brush,
    alt.value(1),
    alt.value(0.3)
)

autos = alt.Chart(cars).mark_circle().encode(
    alt.X("Horsepower"),
    alt.Y("Miles_per_Gallon"),
    color = condition,
    # opacity = opacity_condition,
    href='URL',
    tooltip=[
        alt.Tooltip('Name',title='Nombre: ')
    ]
).add_params(brush) #paso2

# autos.save("autos.html")

autos_link = autos.encode(
    alt.X('Acceleration'),
    alt.Y('Weight_in_lbs')
)


autos | autos_link
C:\Users\pedro\AppData\Local\Temp\ipykernel_8384\1909135793.py:39: UserWarning:

Automatically deduplicated selection parameter with identical configuration. If you want independent parameters, explicitly name them differently (e.g., name='param1', name='param2'). See https://github.com/vega/altair/issues/3891

Bind legend

Code
click_legend = alt.selection_point(fields=['Origin'],bind='legend')
points_opacity_condition = alt.condition(
    click_legend,
    alt.value(1),
    alt.value(0.3)
)

points = alt.Chart(cars).mark_circle().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon'),
    alt.Color('Origin'),
    opacity = points_opacity_condition
).add_params(click_legend)

points

Selección de 2 gráficos diferentes

Code
# BARRAS

click = alt.selection_point(fields=['Origin'])
bar_opacity_condition = alt.condition(
    click,
    alt.value(1),
    alt.value(0.3)
)

bars = alt.Chart(cars).mark_bar().encode(
    alt.X('count()'),
    alt.Y('Origin'),
    alt.Color('Origin'),
    opacity = bar_opacity_condition,
    tooltip=[
        'count()'
    ]
).add_params(click)

bars

#POINTS

points_opacity_condition = alt.condition(
    click,
    alt.value(1),
    alt.value(0.3)
)

points = alt.Chart(cars).mark_circle().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon'),
    alt.Color('Origin'),
    opacity = points_opacity_condition
)

points & bars

Transform filter

Code
brush = alt.selection_interval()
points_opacity_condition = alt.condition(
    brush,
    alt.value(1),
    alt.value(0.3)
)

points = alt.Chart(cars).mark_circle().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon'),
    alt.Color('Origin'),
    opacity = points_opacity_condition
).add_params(brush)

points

bar_chart_max = cars.groupby('Origin').size().max()
bars = alt.Chart(cars).mark_bar().encode(
    alt.X('count()', scale=alt.Scale(domain=[0,bar_chart_max])),
    alt.Y('Origin'),
    alt.Color('Origin'),
    tooltip=[
        'count()'
    ]
).transform_filter(brush)

points & bars

Leyenda en varios gráficos

Code
click_legend = alt.selection_point(fields=['Origin'],bind='legend')

points_opacity_condition = alt.condition(
    click_legend,
    alt.value(1),
    alt.value(0.3)
)

points = alt.Chart(cars).mark_circle().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon'),
    alt.Color('Origin'),
    opacity = points_opacity_condition
).add_params(click_legend)

bars_opacity_condition = alt.condition(
    click_legend,
    alt.value(1),
    alt.value(0.3)
)

bars = alt.Chart(cars).mark_bar().encode(
    alt.X('count()'),
    alt.Y('Origin'),
    alt.Color('Origin'),
    opacity = bars_opacity_condition,
    tooltip=[
        'count()'
    ]
)

points & bars

Movies

Code
movies = pd.read_csv("https://raw.githubusercontent.com/erickedu85/dataset/master/movies-extended.csv", parse_dates=['Release_Date'])
movies.head()
Title Worldwide_Gross Release_Date MPAA_Rating Major_Genre Rotten_Tomatoes_Rating IMDB_Rating
0 Boynton Beach Club 3127472 2006-03-24 R Romantic Comedy NaN NaN
1 Broken Arrow 148345997 1996-02-09 R Action 55.0 5.8
2 Brazil 9929135 1985-12-18 R Black Comedy 98.0 8.0
3 The Cable Guy 102825796 1996-06-14 PG-13 Comedy 52.0 5.8
4 Chain Reaction 60209334 1996-08-02 PG-13 Action 13.0 5.2
Code
# --- dropdown ----
unique_genres = sorted(movies['Major_Genre'].unique())
# print(unique_genres)
dropdown_genres = alt.binding_select(
    name="Seleccionar genero: ", 
    options = unique_genres
)
# -------------------

# -- radiobutton ---
unique_mpaaa = sorted(movies['MPAA_Rating'].unique())

radiobuttons_mpaa = alt.binding_radio(
    name = "Seleccionar MPAA Rating: ",
    options = unique_mpaaa
)
# ------------------

select_genre_mpaa = alt.selection_point(
    fields=['Major_Genre','MPAA_Rating'],
    bind={
        'Major_Genre':dropdown_genres, 'MPAA_Rating':radiobuttons_mpaa
        }
)

opacity_condition = alt.condition(
    select_genre_mpaa,
    alt.value(1),
    alt.value(0.1)
)

points = alt.Chart(movies).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating'),
    alt.Y('IMDB_Rating'),
    alt.Color('Major_Genre'),
    opacity=opacity_condition,
    tooltip=['Title']
).add_params(select_genre_mpaa)

points

Slider

Code
worldwide_gross_max = movies['Worldwide_Gross'].max()

slider_gross = alt.binding_range(
    name='Worldwide Gross: ',
    min=0,
    max=worldwide_gross_max,
    step=10e6
)

select_worldwige_gross = alt.selection_point(
    fields = ['Worldwide_Gross'],
    bind=slider_gross
)

opacity_condition = alt.condition(
    # select_tomatoes_rating,
    alt.datum.Worldwide_Gross > select_worldwige_gross.Worldwide_Gross,
    alt.value(1),
    alt.value(0.1)
)

points = alt.Chart(movies).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating'),
    alt.Y('IMDB_Rating'),
    alt.Color('Major_Genre'),
    opacity=opacity_condition,
    tooltip=['Title','Rotten_Tomatoes_Rating','IMDB_Rating','Major_Genre']
).add_params(select_worldwige_gross).interactive()

points

Overview + details

Code
brush = alt.selection_interval()
opacity_condition = alt.condition(
    brush,
    alt.value(1),
    alt.value(0.2)
)

bars_slider = alt.Chart(movies).mark_bar().encode(
    alt.X('Release_Date'),
    alt.Y('count()'),
    opacity = opacity_condition
).properties(
    height = 50
).add_params(brush)


points = alt.Chart(movies).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating'),
    alt.Y('IMDB_Rating'),
    alt.Color('Major_Genre'),
    opacity=opacity_condition,
    tooltip=['Title','Rotten_Tomatoes_Rating','IMDB_Rating','Major_Genre','Release_Date']
).add_params(brush)


points & bars_slider
C:\Users\pedro\AppData\Local\Temp\ipykernel_8384\4091028901.py:26: UserWarning:

Automatically deduplicated selection parameter with identical configuration. If you want independent parameters, explicitly name them differently (e.g., name='param1', name='param2'). See https://github.com/vega/altair/issues/3891

Minimap - Release Date

Code
select_date = alt.selection_interval()

base = alt.Chart(movies).mark_line(point=True).encode(
    alt.X('Release_Date'),
    alt.Y('mean(Worldwide_Gross)')
).properties(
    width=500
)

# base


##details
upper_chart = base.encode(
    alt.X('Release_Date', scale=alt.Scale(domain=select_date))
).properties(height=180)

lower_chart = base.add_params(select_date).properties(height=50)

upper_chart & lower_chart

Dataset movies from vega

Code
import altair as alt
import pandas as pd
from vega_datasets import data

movies_cleaned = data.movies()
movies_cleaned.head()
movies_cleaned.info()
<class 'pandas.DataFrame'>
RangeIndex: 3201 entries, 0 to 3200
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Title                   3200 non-null   object 
 1   US_Gross                3194 non-null   float64
 2   Worldwide_Gross         3194 non-null   float64
 3   US_DVD_Sales            564 non-null    float64
 4   Production_Budget       3200 non-null   float64
 5   Release_Date            3201 non-null   str    
 6   MPAA_Rating             2596 non-null   str    
 7   Running_Time_min        1209 non-null   float64
 8   Distributor             2969 non-null   str    
 9   Source                  2836 non-null   str    
 10  Major_Genre             2926 non-null   str    
 11  Creative_Type           2755 non-null   str    
 12  Director                1870 non-null   str    
 13  Rotten_Tomatoes_Rating  2321 non-null   float64
 14  IMDB_Rating             2988 non-null   float64
 15  IMDB_Votes              2988 non-null   float64
dtypes: float64(8), object(1), str(7)
memory usage: 400.3+ KB
Code
movies_cleaned['Release_Date'] = pd.to_datetime(movies_cleaned['Release_Date'])

markline

Code
alt.Chart(movies_cleaned).mark_line(point=True).encode(
    alt.X('year(Release_Date)'),
    alt.Y('sum(US_Gross)'),
    tooltip=['year(Release_Date)', 'sum(US_Gross)']
).properties(
    width=600,
    height=300
).interactive()

mark trail

Code
alt.Chart(movies_cleaned).mark_trail().encode(
    alt.X('year(Release_Date)'),
    alt.Y('sum(US_Gross)'),
    alt.Size('mean(US_Gross)'),
    tooltip=['year(Release_Date)', 'mean(US_Gross)']
).properties(
    width=600,
    height=300
).interactive()

suma acumulada

Code
alt.Chart(movies_cleaned).mark_line(point=True).transform_window(
    # Sort the data chronologically
    sort=[{"field": "Release_Date"}],
    # What to add up as you go
    cumulative_us_gross='sum(US_Gross)'
).encode(
    alt.X('Release_Date'),
    alt.Y('cumulative_us_gross:Q'),
    tooltip=['year(Release_Date):T']
).properties(
    width=600,
    height=300
).interactive()

area

Code
alt.Chart(movies_cleaned).mark_area(
    color='blue',
    interpolate='basis',
    line=True
).encode(
    alt.X('year(Release_Date)'),
    alt.Y('mean(US_Gross)'),
    tooltip=['year(Release_Date)', 'mean(US_Gross):Q']
).properties(
    width=600,
    height=300
).interactive()

area gradiente de color

Code
alt.Chart(movies_cleaned).mark_area(
    interpolate='basis',
    line=True,
    color=alt.Gradient(
        gradient='linear',
        stops=[
            alt.GradientStop(color='white', offset=0),
            alt.GradientStop(color='darkgreen', offset=1)
        ],
        x1=1,
        x2=1,
        y1=1,
        y2=0
    )
).encode(
    alt.X('year(Release_Date)'),
    alt.Y('mean(US_Gross)'),
    tooltip=['year(Release_Date)', 'mean(US_Gross):Q']
).properties(
    width=600,
    height=300
).interactive()

highlight

Code
highlight = alt.selection_point(
    on="pointerover",
    fields=["Major_Genre"],
    clear="mouseout"
)

alt.Chart(movies_cleaned).mark_line(
    point=alt.OverlayMarkDef(filled=False, fill="white")
).encode(
    alt.X('year(Release_Date)'),
    alt.Y('mean(US_Gross)'),
    alt.Color('Major_Genre'),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.1)),
    tooltip=['year(Release_Date)', 'mean(US_Gross)']
).add_params(
    highlight
).properties(
    width=600,
    height=300
).interactive()

highlight area

Code
format_gross = '$.3s'

highlight = alt.selection_point(
    on="pointerover", fields=["Major_Genre"], clear="mouseout"
)

alt.Chart(movies_cleaned).mark_area(
    interpolate='basis',
    line=True
).encode(
    x=alt.X('year(Release_Date)'),
    y=alt.Y('sum(US_Gross)', axis=alt.Axis(format=format_gross)),
    color='Major_Genre:N',
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.3)),
    tooltip=[
        alt.Tooltip('year(Release_Date)'),
        alt.Tooltip('Major_Genre'),
        alt.Tooltip('sum(US_Gross)', format=format_gross)
    ]
).add_params(
    highlight
).properties(
    width=600,
    height=300
).interactive()

small multiple

Code
format_gross = '$.3s'

highlight = alt.selection_point(
    on="pointerover", fields=["Major_Genre"], clear="mouseout"
)

alt.Chart(movies_cleaned).mark_area(
    interpolate='basis',
    line=True
).encode(
    x=alt.X('year(Release_Date)'),
    y=alt.Y('sum(US_Gross)', title='Sum US Gross', axis=alt.Axis(format=format_gross)),
    row=alt.Row('Major_Genre:N'),
    color='Major_Genre:N',
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.3)),
    tooltip=[
        alt.Tooltip('year(Release_Date)'),
        alt.Tooltip('Major_Genre'),
        alt.Tooltip('sum(US_Gross)', format=format_gross)
    ]
).add_params(
    highlight
).properties(
    width=600,
    height=70
).interactive()

streamgraph

Code
format_gross = '$.3s'

highlight = alt.selection_point(
    on="pointerover", fields=["Major_Genre"], clear="mouseout"
)

alt.Chart(movies_cleaned).mark_area(
    interpolate='basis',
    line=True
).encode(
    x=alt.X('year(Release_Date)'),
    y=alt.Y('sum(US_Gross)', stack='center', axis=None),
    color='Major_Genre:N',
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.3)),
    tooltip=[
        alt.Tooltip('year(Release_Date)'),
        alt.Tooltip('Major_Genre'),
        alt.Tooltip('sum(US_Gross)', format=format_gross)
    ]
).add_params(
    highlight
).properties(
    width=600,
    height=300
).interactive()

heatmap

Code
alt.Chart(movies_cleaned).mark_rect().encode(
    alt.X('date(Release_Date):O', title='Day'),
    alt.Y('month(Release_Date):O', title='Month'),
    color='sum(US_Gross)',
    tooltip=['sum(US_Gross)']
)